Slip 8

Q.1. Write a python program to categorize the given news text into one of the available 20 
categories of news groups, using multinomial Naïve Bayes machine learning model.

# Import required libraries
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

# Step 1: Load the 20 Newsgroups dataset (all categories)
newsgroups = fetch_20newsgroups(subset='all', shuffle=True, random_state=42)

# Step 2: Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(newsgroups.data, newsgroups.target, test_size=0.25, random_state=42)

# Step 3: Build a Pipeline for vectorization + TF-IDF + Multinomial Naive Bayes
text_clf = Pipeline([
    ('vect', CountVectorizer()),          # Convert text to word counts
    ('tfidf', TfidfTransformer()),        # Apply TF-IDF
    ('clf', MultinomialNB()),             # Multinomial Naive Bayes classifier
])

# Step 4: Train the model
text_clf.fit(X_train, y_train)

# Step 5: Test accuracy on test data
y_pred = text_clf.predict(X_test)
print("Model Accuracy:", round(accuracy_score(y_test, y_pred) * 100, 2), "%")
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=newsgroups.target_names))

# Step 6: Predict on new custom text inputs
new_texts = [
    "NASA is planning another mission to Mars by next year.",
    "How can I install Linux on my PC?",
    "The new GPU from Nvidia offers excellent gaming performance.",
    "Jesus and the resurrection is discussed heavily in this document.",
    "Baseball teams are preparing for the World Series this season."
]

predicted = text_clf.predict(new_texts)

print("\n--- Predictions on New Text ---")
for text, label_index in zip(new_texts, predicted):
    print(f"\nText: {text}\nPredicted Category: {newsgroups.target_names[label_index]}")


Q.2. Write a python program to implement Decision Tree whether or not to play Tennis. 

# Import necessary libraries
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier, plot_tree
import matplotlib.pyplot as plt

# Step 1: Create the dataset
data = {
    'Outlook': ['Sunny', 'Sunny', 'Overcast', 'Rain', 'Rain', 'Rain', 'Overcast',
                'Sunny', 'Sunny', 'Rain', 'Sunny', 'Overcast', 'Overcast', 'Rain'],
    'Temperature': ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Cool',
                    'Mild', 'Cool', 'Mild', 'Mild', 'Mild', 'Hot', 'Mild'],
    'Humidity': ['High', 'High', 'High', 'High', 'Normal', 'Normal', 'Normal',
                 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'High'],
    'Windy': ['False', 'True', 'False', 'False', 'False', 'True', 'True',
              'False', 'False', 'False', 'True', 'True', 'False', 'True'],
    'Play': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes',
             'No', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'No']
}

df = pd.DataFrame(data)
print("Original Dataset:\n", df, "\n")

# Step 2: Encode categorical values into numeric format
le = LabelEncoder()
for col in df.columns:
    df[col] = le.fit_transform(df[col])

print("Encoded Dataset:\n", df, "\n")

# Step 3: Split dataset into features and target
X = df[['Outlook', 'Temperature', 'Humidity', 'Windy']]
y = df['Play']

# Step 4: Create and train the Decision Tree model
model = DecisionTreeClassifier(criterion='entropy', random_state=42)
model.fit(X, y)

# Step 5: Visualize the Decision Tree
plt.figure(figsize=(10, 6))
plot_tree(model, feature_names=['Outlook', 'Temperature', 'Humidity', 'Windy'],
          class_names=['No', 'Yes'], filled=True, rounded=True)
plt.title("Decision Tree - Play Tennis")
plt.show()

# Step 6: Example prediction
# Example: Outlook=Sunny, Temperature=Cool, Humidity=High, Windy=False
example = [[le.transform(['Sunny'])[0],
            le.transform(['Cool'])[0],
            le.transform(['High'])[0],
            le.transform(['False'])[0]]]

predicted = model.predict(example)
print("Example Input → Outlook=Sunny, Temperature=Cool, Humidity=High, Windy=False")
print("Predicted Play Decision:", "Yes" if predicted[0] == 1 else "No")
